#!/usr/bin python3
# -*- coding:UTF-8 -*-
# Author: nigo
import pandas as pd
import numpy as np
import plotly.express as px


def init_data(df):
    """处理数据"""
    datas = {}
    for index, row in df.iterrows():
        line = row['from'] + '-' + row['to']
        price = row['price']
        if line in datas.keys():
            datas[line].append(price)
        else:
            datas[line] = [price]
    return datas


def calc_rate(prices):
    """计算变异系数"""
    data = np.array(prices)
    avg = data.mean()
    std = data.std()
    rate = std/avg
    return rate

def sigma3(prices):
    """计算可疑值"""
    data = np.array(prices)
    avg = data.mean() # 均值
    std = data.std() # 标准差
    errors = []
    for value in data:
        if abs(value - avg)>3* std: # 大于3倍标准差
            errors.append(value)
    return errors


if __name__ == "__main__":
    # 读取数据，并将from和to字段转换为文本
    df = pd.read_csv('waybill.csv',converters={'from':str,'to':str})
    # 计算所有线路的单价列表
    datas = init_data(df)
    # 循环每条线路的单价列表
    for line, prices in datas.items():
        rate = calc_rate(prices) # 计算线路变异系数
        errors = sigma3(prices)
        print(line,errors)

